import requests

import re

class Spider:
    header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36'}
    
    def __init__(self,
                 url,
                 charset='utf-8',
                 json = None,

                 ):

        self.url = url
        self.json = json
        self.charset = charset
        self.html = self.html()

    def html(self):
        
        res = requests.get(self.url,headers = self.header)
        res.encoding = self.charset

        
       
        html = res.text
        return html

    def info(self,**regex):

        info_dict = {}

        for key,value in regex.items():

            info_dict[key]=re.findall(value,self.html)

        return info_dict
            

if __name__ == '__main__':
    s = Spider()
    html = s.html()
    #x = Spider('https://www.x23us.com/html/28/28373/')
    #info = x.info(book_name = '<meta name="og:novel:book_name" content="(.*?)"/>',
    #book_author = '<meta name="og:novel:author" content="(.*?)"/>',
    #book_info = '<td class="L"><a href="(.*?).html">(.*?)</a></td>',
                 
    





















    
